#!/bin/sh
TOPIC=$1
NOW=`date +'%Y_%m_%d_%Hh%Mm'`
DICODE_ANALYSIS_HOME="/usr/lib/dicode/deploy-analysis"
DATA_DIR="/usr/lib/dicode/deploy-analysis/data/hashtags"
HDFS_WORKING_DIR="/user/dicode/twitter/hahstags/${TOPIC}/${NOW}"
HBASE_TABLE="twitter${TOPIC}"

${DICODE_ANALYSIS_HOME}/dicode-analysis eu.dicodeproject.analysis.hashtags.HashtagsTimelineDriver -o ${HDFS_WORKING_DIR}/ -t ${HBASE_TABLE}

TEMPFILE=`tempfile --prefix=twiho`
hadoop fs -ls ${HDFS_WORKING_DIR} | grep ${HDFS_WORKING_DIR}/part | sed -e 's!.* \([^ ]*/part.*\)$!\1!g' | xargs -l1 -I {} ${DICODE_ANALYSIS_HOME}/dicode-analysis seqdumper -s {} | grep ^Key\: >> $TEMPFILE

CURRENT_DATA_DIR=${DATA_DIR}/${TOPIC}/${NOW}
mkdir -p ${CURRENT_DATA_DIR}
cp ${TEMPFILE} ${CURRENT_DATA_DIR}
#sort -nr -k5 $TEMPFILE  > ${CURRENT_DATA_DIR}/${TOPIC}_${NOW}

rm $TEMPFILE

